ADA Programming Guide

home *** CD-ROM | disk | FTP | other *** search

/ ADA Programming Guide / ADA Programming Guide.iso / ada_gwu / adaed / math / math.adb < prev next >

Wrap

Text File | 1996-01-30 | 61.3 KB | 1,930 lines

-- ----------------------------------------------------------------------- -- Title: Float_elementary_functions -- Last Mod: Thu Apr 19 11:26:27 1990 -- Author: Vincent Broman <broman@nosc.mil> -- Copyright 1990 Vincent Broman -- Permission granted to copy, modify, or compile this software for -- one's own use, provided that this copyright notice is preserved intact. -- Permission granted to distribute compiled binary copies of this -- software which are linked in with some other application. -- Permission granted to distribute other copies of this software, -- provided that (1) any copy which is not source code, i.e. not in the -- form in which the software is usually maintained, must be accompanied -- by a copy of the source code from which it was compiled, and (2) the -- one distributing it must refrain from imposing on the recipient -- further restrictions on the distribution of this software. -- -- Description: -- -- a Float-precision-only implementation of -- the proposed standard generic package of elementary functions, -- from the ISO-IEC/JTC1/SC22/WG9 (Ada) -- Numerics Rapporteur Group proposal, Draft 1.1. -- -- This will evolve to conform to later drafts. -- -- Exceptions: argument_error, numeric_error are raised. -- ----------------------------------------------------------------------- -- with math_constants; use math_constants; with primitive_functions; use primitive_functions; package body Math is overflow_error: exception renames numeric_error; -- AI-00387 specifies constraint_error for floating overflows. fminexp: constant Float := Float( Float'machine_emin - 1); fmaxexp: constant Float := Float( Float'machine_emax); mant: constant := Float'machine_mantissa; recip_sqrt_epsilon: constant Float := 2.0 ** ((mant - 1) / 2); two_pi: constant := 2.0 * pi; half_pi: constant := pi / 2.0; subtype octant_number is integer range 0 .. 7; function sqr (x: Float) return Float is -- begin return x * x; end sqr; pragma inline (sqr); ---------------------------------------------------------------------- -- high precision argument reduction routines procedure octant_reduce( x: in Float; r: out Float; nmod8: out octant_number) is -- -- Octant_reduce reduces x to the interval [0, pi/4] with the high -- absolute accuracy needed by trigonometric functions computed in radians -- which are subject to stringent relative accuracy requirements. -- -- R is in 0.0 .. pi/4 and, to within the accuracy described below, -- either (abs( x) - r) is equal to pi/4 times an even integer -- or else (abs( x) - (pi/4 - r)) is equal to pi/4 times an odd integer. -- The angles in odd octants are complemented relative to pi/4 so that -- small sines and cosines may be computed from small angles, -- i.e. r is small anywhere near the x and y coordinate axes. -- Further, r=0 iff x=0, because of the way huge arguments get reduced. -- -- NMOD8 is the octant number of x, in 0 .. 7, i.e. -- for x >= 0 nmod8 is truncate( x / (pi/4)) mod 8, -- for x < 0 nmod8 is 7 - truncate( - x / (pi/4)) mod 8. -- -- The relative accuracy of the reduced angle corresponding to (r,nmod8) -- is better than 2 ulp if abs( x) < 6433 < pi/4*2**13. -- -- The inexactness of (r,nmod8) is caused entirely by -- (1) inexact reduction of huge arguments into the interval [-6433,6433]. -- (2) the difference of 1.01e-21 between pi/4 and sum( pterm), -- (3) the two roundings in producing r itself. -- If abs(x) < 6433, the absolute error in r caused by (1) and (2) -- is less than (sum(pterm)-pi/4)*abs(x)/(pi/4) < 8.3e-18 . -- -- On (0, 22000) the maximum value of abs( x / (sin( x)*cos( x))) -- for x a Float precision floating point number is less than 6.1e10, -- so the maximum relative error in r before rounding in (3) is less than -- 6.1e10*1.01e-21/(pi/4) < 7.85e-11, which is .00066 ulp. -- -- For arguments abs( x) > 6433, x is reduced and rounded more than -- once, so an extra absolute error is introduced, bounded by: -- max( 2**(exponent(2pi)-1-24), abs(x)*2**(-13+1-24)), -- where the former bound applies if reducing by whole periods produces a -- remainder small than 2pi, the latter bound otherwise. -- -- Test your own system with 161*pi/2 to see how well argument reduction -- is done. You should get cos(252.8982086181640625) =~ -4.1857068922e-9 . -- nbits: constant := 13; -- significant bits in n oneminus: constant := 1.0 - 3.0 * Float'epsilon; -- 3 minimal? toobig: constant := pi / 4.0 * (2.0 ** nbits) * oneminus; -- 6433.3+ p: constant Float := pi / 4.0; ax: Float := abs( x); n: Float; odd_n: boolean; function truncate_nbits (x: Float) return Float is -- -- truncate to an integer with nbits or fewer significant bits -- begin -- truncate_nbits if exponent( x) < nbits then return truncate( x); else return shorten( x, nbits); end if; end truncate_nbits; function reduce_by_n (x: Float; n: Float) return Float is -- -- compute x - n * sum(pterm(k),k=1..7) -- erring by less than two roundoffs in the final result. -- x and n are assumed nonnegative. n has an nbit-length integer value. -- type softreal is array( 1 .. 7) of Float; -- -- The high precision eighth-period: -- -- sum(pterm(k),k=1..7) = pi/4 rounded to 68 bits, in 7 8-bit pieces. pterm: constant softreal := (16#0.c9000000000000000#, 16#0.000fd000000000000#, 16#0.00000aa0000000000#, 16#0.00000002200000000#, 16#0.00000000016800000#, 16#0.000000000000c2000#, 16#0.0000000000000034c#); -- -- each product of a 13 bit integer and an 8 bit pterm(k) computed below -- lands exactly on a 21 bit model number, no roundoff. -- diff: Float := x; sumtail: Float := 0.0; prod, nextdiff: Float; begin -- reduce_by_n for k in softreal'range loop -- combine largest terms while the subtraction is still exact, -- then sum tail to combine with head all at once. prod := n * pterm( k); nextdiff := diff - prod; if abs( nextdiff) > prod then -- nextdiff might be inexact for m in reverse k + 1 .. softreal'last loop sumtail := sumtail + n * pterm( m); end loop; return diff - (prod + sumtail); -- two roundings end if; diff := nextdiff; -- difference is still exact end loop; return diff; end reduce_by_n; function mod8 (x: Float) return octant_number is -- -- return x mod 8 for x having a nonnegative integer value. -- xo8: Float := x * (1.0 / 8.0); begin -- mod8 return octant_number( (xo8 - truncate( xo8)) * 8.0); end mod8; begin -- octant_reduce -- if ax too big to handle accurately then reduce by whole periods first while ax > toobig loop n := 8.0 * truncate_nbits( ax / (8.0 * p) * oneminus); -- n < ax / p ax := reduce_by_n( ax, n); end loop; -- -- now ax < pi/4*2**nbits -- n := truncate( ax / p); -- 13 or fewer bits odd_n := odd( n); if odd_n then -- odd octant, complement angle wrt pi/4 -- p - (ax - n*p) = -ax + (n+1)*p ax := - reduce_by_n( ax, n + 1.0); elsif n > 0.0 then -- for even nbr of octants, ax - n*p ax := reduce_by_n( ax, n); else -- n = 0 null; end if; if ax < 0.0 then -- initial n was off by 1 because of roundoff in ax/p r := - ax; if odd_n then n := n + 1.0; else n := n - 1.0; end if; elsif ax > p then -- initial n was off by 1 because of roundoff in ax/p -- want 2 * p - ax r := - reduce_by_n( ax, 2.0); if odd_n then n := n - 1.0; else n := n + 1.0; end if; else -- initial n correct, vast majority of cases r := ax; end if; if x >= 0.0 then nmod8 := mod8( n); else nmod8 := octant_number'last - mod8( n); end if; end octant_reduce; procedure period_reduce( x: in Float; p: in Float; r: out Float; n: out octant_number) is -- -- Period_reduce reduces x to the interval [0, p/8] with the high -- absolute accuracy needed by trigonometric functions -- which are subject to stringent relative accuracy requirements. -- -- X is the input angle, in units such that p is the cyclic period. -- Execution is erroneous if p is not positive. -- R is in 0.0 .. p/8 and, to within the accuracy described below, -- either (abs( x) - r) is equal to p/8 times an even integer -- or else (abs( x) - (p/8 - r)) is equal to p/8 times an odd integer. -- The angles in odd octants are complemented relative to p/8 so that -- small sines and cosines may be computed from small angles, -- i.e. r is small anywhere near the x and y coordinate axes. -- -- N is the octant number of x, in 0 .. 7, i.e. -- for x >= 0 n is truncate( x / (p/8)) mod 8, -- for x < 0 n is 7 - truncate( - x / (p/8)) mod 8. -- -- The reduced angle corresponding to (r,n) is rounded and -- correct up to the relative machine accuracy of r. -- All floating point arithmetic used is exact except possibly for -- (1) complementing r in an odd octant, computing (p/8 - r1), -- (2) the last reductions, if p/8 loses bits by denormalization. -- r1: Float := abs( x); exponent_diff: constant integer := exponent( r1) - exponent( p); p1: Float := scale( p, exponent_diff); -- p1 & r1 now have same exponent s: integer := exponent_diff + 3; octant: octant_number := 0; begin -- period reduce if s >= 0 then -- We repeatedly subtract (p times a power of 2) from r. loop if r1 >= p1 then -- It is assumed that the floating point subtraction of -- two positives (a-b) is exact if either -- (1) exponent(a) = exponent(b) or -- (2) exponent(a)-1 = exponent(b) >= exponent(a-b). -- This is the case for (r1-p1). r1 := r1 - p1; -- case s is when 0 => r1 := p1 - r1; -- odd octant, complement octant := octant + 1; when 1 => octant := octant + 2; when 2 => octant := octant + 4; when others => null; end case; end if; -- if r1 >= p1 exit when s = 0; s := s - 1; p1 := p1 * 0.5; -- could scale if that's faster end loop; -- down s r := r1; else r := r1; -- small angle, no reduction end if; -- if s nonnegative if x >= 0.0 then n := octant; else n := octant_number'last - octant; -- no effect on odd_octant end if; end period_reduce; procedure ln2_reduce( x: in Float; r: out Float; n: out integer) is -- -- Ln2_reduce reduces x to an interval of width approximately ln(2), -- with the high absolute accuracy needed by an exponential function which is -- subject to stringent relative accuracy requirements. -- -- This routine requires abs( x) < 89.0 for spec accuracy. -- -- Upon return, x = r + n * ln(2), -- to within an absolute accuracy of 1 * epsilon (using the Ada model), -- or more realistically to within 6e-8 . -- Also, abs( r) <= nat_log_2 / 2 + 256 * epsilon, -- and n is in the range -128 .. 128, -- -- The inexactness of the resultant (r,n) is caused entirely by -- (1) the negligible difference of about 5.5e-14 between ln(2) and ln2, -- (2) the 1 or 2 roundings in producing the result r itself. -- ln2_a: constant := 16#0.b1700000000#; ln2_b: constant := 16#0.000217f0000#; ln2_c: constant := 16#0.00000007d1c#; -- ln(2) = ln2_a+ln2_b+ln2_c accurate to 43 bits, in 3 14-bit pieces. nrd: Float := floor( x / nat_log_2 + 0.5); begin -- ln2_reduce r := ((x - ln2_a * nrd) - ln2_b * nrd) - ln2_c * nrd; -- The multiplications did happen exactly, using model numbers. -- The a term subtracts exactly. The c term is so small -- that a bound on the absolute error can depend only on the size -- of the result of the second subtraction. So, two roundoffs -- make the absolute error < 1*epsilon, pessimistically. n := integer( nrd); -- if we wanted to eliminate slop in the output interval, -- then just test r, adjust n, and repeat the reduction. end ln2_reduce; function complement (x: in Float) return Float is -- compute sqrt(1-x**2) ax: constant Float := abs( x); begin if ax >= 0.5 then declare t: constant Float := 1.0 - ax; begin return sqrt( 2.0 * t - t * t); end; else return sqrt( (1.0 + x) * (1.0 - x)); end if; end complement; ---------------------------------------------------------------------- -- functions on reduced or simplified domains function reduced_logdel_2 (y: in Float) return Float is -- -- return the base-2 logarithm of (1+y)/(1-y), -- y in the interval [ -3+sqrt(8) .. 3-sqrt(8) ]. -- the coefficients are from a rational (even) 1,1 approximation to -- log_2( (1+y)/(1-y) )/y with relative error (omitting roundoff) < 1.91e-8. -- this delivers high relative accuracy for small y. -- Note: reduced_logdel_2( 0.0) = 0.0 exactly. -- p0: constant := -4.7845025; p1: constant := 1.2906108; q0: constant := -1.6581823; y2: constant Float := y * y; begin return y * (p0 + p1 * y2) / (q0 + y2); end reduced_logdel_2; function reduced_log_2 (x: in Float) return Float is -- -- return the base-2 logarithm of x, x in the interval [sqrt(1/2),sqrt(2)]. -- begin return reduced_logdel_2( (x - 1.0) / (x + 1.0)); end reduced_log_2; procedure log_2_parts (int_part: out Float; frac_part: out Float; x: in Float) is -- -- return the integer and fractional part of the -- base-2 logarithm of x. -- int_part is a floating pt integer, and abs( frac_part) <= 1/2. -- ex: integer := exponent( x); ma: Float := mantissa( x); sqrt_half: constant := sqrt_2 / 2.0; begin if ma < sqrt_half then ma := 2.0 * ma; ex := ex - 1; end if; -- sqrt_half <= ma < sqrt_2 -- and x = ma * 2 ** ex int_part := Float( ex); frac_part := reduced_log_2( ma); end log_2_parts; function log_2 (x: in Float) return Float is -- -- return the base-2 logarithm of x, assuming x > 0.0 -- -- the value is exact for x an integer power of 2. -- n, f: Float; begin -- log_2 log_2_parts( n, f, x); return n + f; end log_2; function reduced_two_to_the (x: in Float) return Float is -- -- return 2.0**x for x in the interval [-0.524, 0.524]. -- the coefficients are a Remez-optimized rational(3,2) approximation, -- with relative error (omitting roundoff) < 1e-8. -- The interval would be [-1/2, 1/2] but we allow some slop. -- p0: constant := 41.694187; p1: constant := 17.342325; p2: constant := 3.0047081; p3: constant := 0.23083161; q0: constant := p0; -- assures 2**0 = 1 q1: constant := -11.5578823; begin return (p0 + x * (p1 + x * (p2 + x * p3))) / (q0 + x * (q1 + x)); end reduced_two_to_the; function two_to_the (x: in Float) return Float is -- -- return 2.0**x, underflowing to zero for very negative x, -- and overflowing for very positive x. -- ix: Float := floor( x + 0.5); frac: Float := x - ix; begin return scale( reduced_two_to_the( frac), integer( ix)); end two_to_the; function reduced_sin (x: in Float) return Float is -- -- return sin x for x in the interval [0,pi/4]. -- the coefficients are a chebychev-economized taylor series for -- sin((1-x)*pi/8)/((1-x)*pi/8) on [-1,1], -- with absolute error (omitting roundoff) < 2e-8. -- this gives high relative accuracy for small x. -- -- scale factor for size of interval [0,pi/4] already reckoned in below a0: constant := 9.74495337094254e-01; a1: constant := 1.28892138844516e-01; a2: constant := -1.59024045807998e-01; a3: constant := -1.28509163870500e-02; a4: constant := 7.83587003622433e-03; a5: constant := 4.55929708079455e-04; pio8: constant := pi / 8.0; y: constant Float := pio8 - x; begin return x * (a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * a5))))); end reduced_sin; function reduced_cos (x: in Float) return Float is -- -- return cos x for x in the interval [0,pi/4]. -- the coefficients are a chebychev-economized taylor series for -- cos((1-x)*pi/8) on [-1,1], -- with absolute error (omitting roundoff) < 1.8e-9 -- -- scale factor for size of interval [0,pi/4] already reckoned in below a0: constant := 9.23879532410434e-01; a1: constant := 3.82683402033700e-01; a2: constant := -4.61939745322990e-01; a3: constant := -6.37789978530365e-02; a4: constant := 3.84943015536361e-02; a5: constant := 3.16859376215302e-03; a6: constant := -1.27611586733778e-03; pio8: constant := pi / 8.0; y: constant Float := pio8 - x; begin return a0 + y * (a1 + y * (a2 + y * (a3 + y * (a4 + y * (a5 + y * a6))))); end reduced_cos; function reduced_arctan (x: in Float) return Float is -- -- returns arctan x in radians from [-pi/4,pi/4] for x in [-1,1]. -- coefficents from Abramowitz and Stegun p81, #4.4.49 . -- relative error < 2e-8. -- x2: constant Float := x * x; -- a2: constant := -0.3333314528; a4: constant := 0.1999355085; a6: constant := -0.1420889944; a8: constant := 0.1065626393; a10: constant := -0.0752896400; a12: constant := 0.0429096138; a14: constant := -0.0161657367; a16: constant := 0.0028662257; begin return x * (1.0 + x2 * ( a2 + x2 * ( a4 + x2 * ( a6 + x2 * ( a8 + x2 * (a10 + x2 * (a12 + x2 * (a14 + x2 * a16)))))))); end reduced_arctan; ---------------------------------------------------------------------- -- implementations of user-callable fns function sqrt (x : Float) return Float is -- -- Declaration: -- function SQRT (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- SQRT(X) ~ sqrt(X) -- Usage: -- Z := SQRT(X); -- Domain: -- X >= 0.0 -- Range: -- SQRT(X) >= 0.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- (b) SQRT(0.0) = 0.0 -- Notes: -- (a) The upper bound of the reachable range of SQRT is approximately given by -- SQRT(X)<=sqrt(FLOAT_TYPE'SAFE_LARGE) -- (b) Other standards might impose additional constraints on SQRT. For -- example, the IEEE standards for binary and radix-independent -- floating-point arithmetic require greater accuracy in the result of SQRT -- than this standard requires, and they require that SQRT(-0.0)=-0.0. -- An implementation of SQRT in GENERIC_ELEMENTARY_FUNCTIONS that -- conforms to this standard will conform to those other standards if it -- satisfies their additional constraints. -- begin if x > 0.0 then declare expo: integer := exponent( x); scaling: integer; -- for scaling of result reduced_arg, estimate: Float; -- -- minimax approximation constants from Hart p192, SQRT 0290 -- adjusted for an interval of [1/2,2] instead of [1/4,1]. p1: constant := 3.09033297; p0: constant := 1.0000233; q0: constant := 3.0903708; begin if expo >= 0 then scaling := expo / 2; else scaling := (expo - 1) / 2; end if; reduced_arg := scale( x, - 2 * scaling); -- 0.5 <= reduced_arg < 2.0 -- -- rational approx of order 1,1 to sqrt( reduced_arg) estimate := (p1 * reduced_arg + p0) / (reduced_arg + q0); -- which has 2**-8.6 max relative error. -- need only 2**-5.3 to support iteration to follow. -- Now apply Heron's iteration twice estimate := 0.5 * (estimate + reduced_arg / estimate); estimate := 0.5 * (estimate + reduced_arg / estimate); return scale( estimate, scaling); end; elsif x = 0.0 then return 0.0; else raise argument_error; end if; end sqrt; function log (x : Float) return Float is -- -- Declaration: -- function LOG (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- LOG(X) ~ ln(X) -- Usage: -- Z := LOG(X); -- natural logarithm -- Domain: -- X > 0.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) LOG(1.0) = 0.0 -- Notes: -- The reachable range of LOG is approximately given by -- ln(FLOAT_TYPE'SAFE_SMALL) <= LOG(X) <= ln(FLOAT_TYPE'SAFE_LARGE) -- begin if x > 0.0 then -- log_2( 1.0) = 0.0 return log_2( x) * nat_log_2; else raise argument_error; end if; end log; function log (x, base : Float) return Float is -- -- Declaration: -- function LOG (X, BASE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- LOG(X,BASE) ~ log to base BASE of X -- Usage: -- Z := LOG(X, 10.0); -- base 10 logarithm -- Z := LOG(X, 2.0); -- base 2 logarithm -- Z := LOG(X, BASE); -- base BASE logarithm -- Domain: -- (a) X > 0.0 -- (b) BASE > 0.0 -- (c) BASE /= 1.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) LOG(1.0,BASE) = 0.0 -- Notes: -- (a) When BASE > 1.0, the reachable range of LOG is approximately given by -- log to base BASE of FLOAT_TYPE'SAFE_SMALL <= LOG(X,BASE) <= -- log to base BASE of FLOAT_TYPE'SAFE_LARGE -- (b) When 0.0 < BASE < 1.0, the reachable range of LOG is approximately given by -- log to base BASE of FLOAT_TYPE'SAFE_LARGE <= LOG(X,BASE) <= -- log to base BASE of FLOAT_TYPE'SAFE_SMALL -- begin if x > 0.0 and base > 0.0 and base /= 1.0 then -- log_2( 1.0) = 0.0 return log_2( x) / log_2( base); else raise argument_error; end if; end log; function exp (x : Float) return Float is -- -- Declaration: -- function EXP (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- EXP(X) ~ e raised to the X power -- Usage: -- Z := EXP(X); -- e raised to the power X -- Domain: -- Mathematically unbounded -- Range: -- EXP(X) >= 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) EXP(0.0) = 1.0 -- Notes: -- The usable domain of EXP is approximately given by -- X <= ln(FLOAT_TYPE'SAFE_LARGE) -- minarg: constant Float := fminexp * nat_log_2; maxarg: constant Float := fmaxexp * nat_log_2; begin if x >= maxarg then raise overflow_error; elsif x < minarg then return 0.0; else declare r: Float; n: integer; begin ln2_reduce( x, r, n); return scale( reduced_two_to_the( r / nat_log_2), n); end; end if; end exp; function "**" (x, y : Float) return Float is -- -- Declaration: -- function "**" (X, Y : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- X ** Y ~ X raised to the power Y -- Usage: -- Z := X ** Y; -- X raised to the power Y -- Domain: -- (a) X >= 0.0 -- (b) Y > 0.0 when X = 0.0 -- Range: -- X ** Y >= 0.0 -- Accuracy: -- (a) Maximum relative error (when X > 0.0) = -- (4.0+|Y*ln(X)|/32.0) * FLOAT_TYPE'BASE'EPSILON -- (b) X ** 0.0 = 1.0 when X > 0.0 -- (c) 0.0 ** Y = 0.0 when Y > 0.0 -- (d) X ** 1.0 = X -- (e) 1.0 ** Y =1.0 -- Notes: -- The usable domain of "**", when X > 0.0, is approximately the set of -- values for X and Y satisfying -- Y*ln(X) <= ln(FLOAT_TYPE'SAFE_LARGE) -- This imposes a positive upper bound on Y (as a function of X) when -- X > 1.0, and a negative lower bound on Y (as a function of X) when -- 0.0 < X < 1.0. -- function half_precision (r: Float) return Float is -- truncate to half as many bits in the mantissa begin return shorten( r, Float'machine_mantissa / 2); end half_precision; pragma inline( half_precision); begin -- function "**" if x = 1.0 then return 1.0; -- 1**y elsif x > 0.0 then if y = 0.0 then return 1.0; -- x**0 elsif y = 1.0 then return x; -- x**1 else declare -- general case of 2**( y*log_2( x)) sy: Float := y; fex, ma, logx: Float; begin log_2_parts( fex, ma, x); logx := fex + ma; -- If y*log_2( x) overflows positively, we don't care -- at what point the overflow exception happens. -- Large negative values of y*log_2( x) are detected by -- the following conservative test, which is equivalent to -- sy * logx < 2 * fminexp, but will not overflow, -- because abs( logx) < 2 * abs( fminexp). if (sy / (2.0 * fminexp)) * logx > 1.0 then return 0.0; -- underflow else declare -- y*log_2( x) must have 7 extra bits of accuracy -- to get full precision in the mantissa of the result. -- so we split up the factors in two parts y1: constant Float := half_precision( sy); y2: constant Float := sy - y1; -- sy = y1 + y2 exactly w1: constant Float := half_precision( logx); w2: constant Float := (fex - w1) + ma; -- fex + ma = w1 + w2 exactly -- -- what we need is (y1+y2)*(w1+w2) very accurately y1w1: constant Float := y1*w1; -- exactly n: constant Float := floor( y1w1 + 0.5); frac: constant Float := (y1w1 - n) + (sy * w2 + y2 * w1); -- rounded to within 2**-Float'mantissa, -- frac is nearly confined to [-.5,.5] . begin return scale( reduced_two_to_the( frac), integer( n)); end; end if; end; end if; elsif x = 0.0 and y > 0.0 then return 0.0; -- 0**y else raise argument_error; -- x < 0 or x = 0 and y <= 0 end if; end "**"; function sin (x : Float) return Float is -- -- Declaration: -- function SIN (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- SIN(X) ~ sin(X) -- Usage: -- Z := SIN(X); -- X in radians -- Domain: -- Mathematically unbounded -- Range: -- |SIN(X)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- when |X| is less than or equal to some documented implementation-dependent -- threshold, which must not be less than -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2) -- For larger values of |X|, degraded accuracy is allowed. An implementation -- must document its behavior for large |X|. -- (b) SIN(0.0) = 0.0 -- r: Float; oct: octant_number; begin octant_reduce( x, r, oct); case oct is when 0| 3 => return reduced_sin( r); when 1| 2 => return reduced_cos( r); when 4| 7 => return - reduced_sin( r); when 5| 6 => return - reduced_cos( r); end case; end sin; function sin (x, cycle : Float) return Float is -- -- Declaration: -- function SIN (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- SIN(X,CYCLE) ~ sin(2Pi * X/CYCLE) -- Usage: -- Z := SIN(X, 360.0); -- X in degrees -- Z := SIN(X, 1.0); -- X in bams (binary angular measure) -- Z := SIN(X, CYCLE); -- X in units such that one complete -- -- cycle of rotation corresponds to -- -- X = CYCLE -- Domain: -- (a) X mathematically unbounded -- (b) CYCLE > 0.0 -- Range: -- |SIN(X,CYCLE)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- (b) For integer k, SIN(X,CYCLE)= 0.0 when X=k*CYCLE/2.0 -- 1.0 when X=(4k+1)*CYCLE/4.0 -- -1.0 when X=(4k+3)*CYCLE/4.0 -- r: Float; oct: octant_number; begin if cycle > 0.0 then period_reduce( x, cycle, r, oct); r := r / cycle * two_pi; -- underflow denormal allowed case oct is when 0| 3 => return reduced_sin( r); when 1| 2 => return reduced_cos( r); when 4| 7 => return - reduced_sin( r); when 5| 6 => return - reduced_cos( r); end case; else raise argument_error; end if; end sin; function cos (x : Float) return Float is -- -- Declaration: -- function COS (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COS(X) ~ cos(X) -- Usage: -- Z := COS(X); -- X in radians -- Domain: -- Mathematically unbounded -- Range: -- |COS(X)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- when |X| is less than or equal to some documented implementation-dependent -- threshold, which must not be less than -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2) -- For larger values of |X|, degraded accuracy is allowed. An implementation -- must document its behavior for large |X|. -- (b) COS(0.0) = 1.0 -- r: Float; oct: octant_number; begin octant_reduce( x, r, oct); case oct is when 0| 7 => return reduced_cos( r); when 1| 6 => return reduced_sin( r); when 2| 5 => return - reduced_sin( r); when 3| 4 => return - reduced_cos( r); end case; end cos; function cos (x, cycle : Float) return Float is -- -- Declaration: -- function COS (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COS(X,CYCLE) ~ cos(2Pi*X/CYCLE) -- Usage: -- Z := COS(X, 360.0); -- X in degrees -- Z := COS(X, 1.0); -- X in bams -- Z := COS(X, CYCLE); -- X in units such that one complete -- -- cycle of rotation corresponds to -- -- X = CYCLE -- Domain: -- (a) X mathematically unbounded -- (b) CYCLE > 0.0 -- Range: -- |COS(X,CYCLE)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 2.0 * FLOAT_TYPE'BASE'EPSILON -- (b) For integer k, COS(X,CYCLE) = 1.0 when X=k*CYCLE -- 0.0 when X=(2k+1)*CYCLE/4.0 -- -1.0 when X=(2k+1)*CYCLE/2.0 -- r: Float; oct: octant_number; begin if cycle > 0.0 then period_reduce( x, cycle, r, oct); r := r / cycle * two_pi; -- underflow denormal allowed case oct is when 0| 7 => return reduced_cos( r); when 1| 6 => return reduced_sin( r); when 2| 5 => return - reduced_sin( r); when 3| 4 => return - reduced_cos( r); end case; else raise argument_error; end if; end cos; function tan (x : Float) return Float is -- -- Declaration: -- function TAN (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- TAN(X) ~ tan(X) -- Usage: -- Z := TAN(X); -- X in radians -- Domain: -- Mathematically unbounded -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- when |X| is less than or equal to some documented implementation-dependent -- threshold, which must not be less than -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2) -- For larger values of |X|, degraded accuracy is allowed. An implementation -- must document its behavior for large |X|. -- (b) TAN(0.0) = 0.0 -- r: Float; oct: octant_number; begin octant_reduce( x, r, oct); case oct is when 0| 4 => return reduced_sin( r) / reduced_cos( r); when 1| 5 => if r /= 0.0 then return reduced_cos( r) / reduced_sin( r); else -- x must have been huge return 0.0; end if; when 2| 6 => if r /= 0.0 then return - reduced_cos( r) / reduced_sin( r); else -- x must have been huge return 0.0; end if; when 3| 7 => return - reduced_sin( r) / reduced_cos( r); end case; end tan; function tan (x, cycle : Float) return Float is -- -- Declaration: -- function TAN (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- TAN(X,CYCLE) ~ tan(2Pi*X/CYCLE) -- Usage: -- Z := TAN(X, 360.0); -- X in degrees -- Z := TAN(X, 1.0); -- X in bams -- Z := TAN(X, CYCLE); -- X in units such that one complete -- -- cycle of rotation corresponds to -- -- X = CYCLE -- Domain: -- (a) X /= (2k+1)*CYCLE/4.0, for integer k -- (b) CYCLE > 0.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) TAN(X,CYCLE) = 0.0 when X=k*CYCLE/2.0, for integer k -- r: Float; oct: octant_number; begin if cycle > 0.0 then period_reduce( x, cycle, r, oct); r := r / cycle * two_pi; -- underflow denormal allowed case oct is when 0| 4 => return reduced_sin( r) / reduced_cos( r); when 1| 5 => if r /= 0.0 then return reduced_cos( r) / reduced_sin( r); else raise argument_error; end if; when 2| 6 => if r /= 0.0 then return - reduced_cos( r) / reduced_sin( r); else raise argument_error; end if; when 3| 7 => return - reduced_sin( r) / reduced_cos( r); end case; else raise argument_error; end if; end tan; function cot (x : Float) return Float is -- -- Declaration: -- function COT (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COT(X) ~ cot(X) -- Usage: -- Z := COT(X); -- X in radians -- Domain: -- X /= 0.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- when |X| is less than or equal to some documented implementation-dependent -- threshold, which must not be less than -- FLOAT_TYPE'MACHINE_RADIX ** (FLOAT_TYPE'MACHINE_MANTISSA/2) -- For larger values of |X|, degraded accuracy is allowed. An implementation -- must document its behavior for large |X|. -- r: Float; oct: octant_number; begin if x /= 0.0 then octant_reduce( x, r, oct); case oct is when 0| 4 => if r /= 0.0 then return reduced_cos( r) / reduced_sin( r); else -- x must have been huge return 0.0; end if; when 1| 5 => return reduced_sin( r) / reduced_cos( r); when 2| 6 => return - reduced_sin( r) / reduced_cos( r); when 3| 7 => if r /= 0.0 then return - reduced_cos( r) / reduced_sin( r); else -- x must have been huge return 0.0; end if; end case; else raise argument_error; end if; end cot; function cot (x, cycle : Float) return Float is -- -- Declaration: -- function COT (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COT(X,CYCLE) ~ cot(2Pi*X/CYCLE) -- Usage: -- Z := COT(X, 360.0); -- X in degrees -- Z := COT(X, 1.0); -- X in bams -- Z := COT(X, CYCLE); -- X in units such that one complete -- -- cycle of rotation corresponds to -- -- X = CYCLE -- Domain: -- (a) X /= k*CYCLE/2.0, for integer k -- (b) CYCLE > 0.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) COT(X,CYCLE) = 0.0 when X=(2k+1)*CYCLE/4.0, for integer k -- r: Float; oct: octant_number; begin if cycle > 0.0 then period_reduce( x, cycle, r, oct); r := r / cycle * two_pi; -- underflow denormal allowed case oct is when 0| 4 => if r /= 0.0 then return reduced_cos( r) / reduced_sin( r); else raise argument_error; end if; when 1| 5 => return reduced_sin( r) / reduced_cos( r); when 2| 6 => return - reduced_sin( r) / reduced_cos( r); when 3| 7 => if r /= 0.0 then return - reduced_cos( r) / reduced_sin( r); else raise argument_error; end if; end case; else raise argument_error; end if; end cot; function arcsin (x : Float) return Float is -- -- Declaration: -- function ARCSIN (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCSIN(X) ~ arcsine(X) -- Usage: -- Z := ARCSIN(X); -- Z in radians -- Domain: -- |X| <= 1.0 -- Range: -- |ARCSIN(X)| <= Pi/2 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCSIN(0.0) = 0.0 -- (c) ARCSIN(1.0) = Pi/2 -- (d) ARCSIN(-1.0) = -Pi/2 -- Notes: -- - Pi/2 and Pi/2 are not safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if abs( x) <= 1.0 then return arctan( y => x, x => complement( x)); else raise argument_error; end if; end arcsin; function arcsin (x, cycle : Float) return Float is -- -- Declaration: -- function ARCSIN (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCSIN(X,CYCLE) ~ arcsin(X)*CYCLE/2Pi -- Usage: -- Z := ARCSIN(X, 360.0); -- Z in degrees -- Z := ARCSIN(X, 1.0); -- Z in bams -- Z := ARCSIN(X, CYCLE); -- Z in units such that one complete -- -- cycle of rotation corresponds to -- -- Z = CYCLE -- Domain: -- (a) |X| <= 1.0 -- (b) CYCLE > 0.0 -- Range: -- |ARCSIN(X,CYCLE) <= CYCLE/4.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCSIN(0.0,CYCLE) = 0.0 -- (c) ARCSIN(1.0,CYCLE) = CYCLE/4.0 -- (d) ARCSIN(-1.0,CYCLE) = -CYCLE/4.0 -- Notes: -- - CYCLE/4.0 and CYCLE/4.0 -- might not be safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if abs( x) <= 1.0 and cycle > 0.0 then return arctan( y => x, x => complement( x), cycle => cycle); else raise argument_error; end if; end arcsin; function arccos (x : Float) return Float is -- -- Declaration: -- function ARCCOS (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCCOS(X) ~ arccos(X) -- Usage: -- Z := ARCCOS(X); -- Z in radians -- Domain: -- |X| <= 1.0 -- Range: -- 0.0 <= ARCCOS(X) <= Pi -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOS(1.0) = 0.0 -- (c) ARCCOS(0.0) = Pi/2 -- (d) ARCCOS(-1.0) = Pi -- Notes: -- Pi/2 and Pi are not safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if abs( x) <= 1.0 then return arctan( y => complement( x), x => x); else raise argument_error; end if; end arccos; function arccos (x, cycle : Float) return Float is -- -- Declaration: -- function ARCCOS (X, CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCCOS(X,CYCLE) ~ arccos(X)*CYCLE/2Pi -- Usage: -- Z := ARCCOS(X, 360.0); -- Z in degrees -- Z := ARCCOS(X, 1.0); -- Z in bams -- Z := ARCCOS(X, CYCLE); -- Z in units such that one complete -- -- cycle of rotation corresponds to -- -- Z = CYCLE -- Domain: -- (a) |X| <= 1.0 -- (b) CYCLE > 0.0 -- Range: -- 0.0 <= ARCCOS(X,CYCLE) <= CYCLE/2.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOS(1.0,CYCLE) = 0.0 -- (c) ARCCOS(0.0,CYCLE) = CYCLE/4.0 -- (d) ARCCOS(-1.0,CYCLE) = CYCLE/2.0 -- Notes: -- CYCLE/4.0 and CYCLE/2.0 -- might not be safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if abs( x) <= 1.0 and cycle > 0.0 then return arctan( y => complement( x), x => x, cycle => cycle); else raise argument_error; end if; end arccos; function arctan (y : Float; x : Float := 1.0) return Float is -- -- Declaration: -- function ARCTAN (Y : FLOAT_TYPE; -- X : FLOAT_TYPE := 1.0) return FLOAT_TYPE; -- Definition: -- (a) ARCTAN(Y) ~ arctan(Y) -- (b) ARCTAN(Y,X) ~ arctan(Y/X) when X >= 0.0 -- arctan(Y/X)+Pi when X < 0.0 and Y >= 0.0 -- arctan(Y/X)-Pi when X < 0.0 and Y < 0.0 -- Usage: -- Z := ARCTAN(Y); -- Z, in radians, is the angle (in the -- -- quadrant containing the point (1.0,Y)) -- -- whose tangent is Y -- Z := ARCTAN(Y, X); -- Z, in radians, is the angle (in the -- -- quadrant containing the point (X,Y)) -- -- whose tangent is Y/X -- Domain: -- X /= 0.0 when Y = 0.0 -- Range: -- (a) |ARCTAN(Y)| <= Pi/2 -- (b) 0.0 < ARCTAN(Y,X) <= Pi when Y >= 0.0 -- (c) -Pi <= ARCTAN(Y,X) <= 0.0 when Y < 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCTAN(0.0) = 0.0 -- (c) ARCTAN((0.0,X) = 0.0 when X > 0.0 -- Pi when X < 0.0 -- (d) ARCTAN(Y,0.0) = Pi/2 when Y > 0.0 -- -Pi/2 when Y < 0.0 -- Notes: -- -Pi,-Pi/2,Pi/2 and Pi are not safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin if x = 0.0 and y = 0.0 then raise argument_error; elsif x >= y then if x >= - y then return reduced_arctan( y / x); -- near +x axis else return - half_pi - reduced_arctan( x / y); -- near -y axis end if; elsif x >= - y then return half_pi - reduced_arctan( x / y); -- near +y axis elsif y >= 0.0 then return pi + reduced_arctan( y / x); -- just above -x axis else return - pi + reduced_arctan( y / x); -- just below -x axis end if; end arctan; function arctan (y : Float; x : Float := 1.0; cycle : Float) return Float is -- -- Declaration: -- function ARCTAN (Y : FLOAT_TYPE; -- X : FLOAT_TYPE := 1.0; -- CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- (a) ARCTAN(Y,CYCLE) ~ arctan(Y)*CYCLE/2Pi -- (b) ARCTAN(Y,X,CYCLE) ~ arctan(Y/X)*CYCLE/2Pi when X >= 0.0 -- (arctan(Y/X)+Pi)*CYCLE/2Pi when X < 0.0 and Y >= 0.0 -- (arctan(Y/X)-Pi)*CYCLE/2Pi when X < 0.0 and Y < 0.0 -- Usage: -- Z := ARCTAN(Y, CYCLE => 360.0); -- Z, in degrees, is the -- -- angle (in the quadrant -- -- containing the point -- -- (1.0,Y)) whose tangent is Y -- Z := ARCTAN(Y, CYCLE => 1.0); -- Z, in bams, is the -- -- angle (in the quadrant -- -- containing the point -- -- (1.0,Y)) whose tangent is Y -- Z := ARCTAN(Y, CYCLE => CYCLE); -- Z, in units such that one -- -- complete cycle of rotation -- -- corresponds to Z = CYCLE, -- -- is the angle (in the -- -- quadrant containing the -- -- point (1.0,Y)) whose -- -- tangent is Y -- Z := ARCTAN(Y, X, 360.0); -- Z, in degrees, is the -- -- angle (in the quadrant -- -- containing the point (X,Y)) -- -- whose tangent is Y/X -- Z := ARCTAN(Y, X, 1.0); -- Z, in bams, is the -- -- angle (in the quadrant -- -- containing the point (X,Y)) -- -- whose tangent is Y/X -- Z := ARCTAN(Y, X, CYCLE); -- Z, in units such that one -- -- complete cycle of rotation -- -- corresponds to Z = CYCLE, -- -- is the angle (in the -- -- quadrant containing the -- -- point (X,Y)) whose -- -- tangent is Y/X -- Domain: -- (a) X /= 0.0 when Y = 0.0 -- (b) CYCLE > 0.0 -- Range: -- (a) |ARCTAN(Y,CYCLE)| <= CYCLE/4.0 -- (b) 0.0 <= ARCTAN(Y,X,CYCLE) <= CYCLE/2.0 when Y >= 0.0 -- (c) -CYCLE/2.0 <= ARCTAN(Y,X,CYCLE) <= 0.0 when Y < 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCTAN(0.0,CYCLE) = 0.0 -- (c) ARCTAN(0.0,X,CYCLE) = 0.0 when X > 0.0 -- CYCLE/2.0 when X < 0.0 -- (d) ARCTAN(Y,0.0,CYCLE) = CYCLE/4.0 when Y > 0.0 -- -CYCLE/4.0 when Y < 0.0 -- Notes: -- -CYCLE/2.0, -CYCLE/4.0, CYCLE/4.0 and CYCLE/2.0 -- might not be safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (c) or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- scale: constant Float := cycle / two_pi; begin -- what happens if scale underflows? cycle/4 underflows? if cycle <= 0.0 or else (x = 0.0 and y = 0.0) then raise argument_error; elsif x >= y then if x >= - y then -- near +x axis return scale * reduced_arctan( y / x); else -- near -y axis return - cycle / 4.0 - scale * reduced_arctan( x / y); end if; elsif x >= - y then -- near +y axis return cycle / 4.0 - scale * reduced_arctan( x / y); elsif y >= 0.0 then -- just above -x axis return cycle / 2.0 + scale * reduced_arctan( y / x); else -- just below -x axis return - cycle / 2.0 + scale * reduced_arctan( y / x); end if; end arctan; function arccot (x : Float; y : Float := 1.0) return Float is -- -- Declaration: -- function ARCCOT (X : FLOAT_TYPE; -- Y : FLOAT_TYPE := 1.0) return FLOAT_TYPE; -- Definition: -- (a) ARCCOT(X) ~ arccot(X) -- (b) ARCCOT(X,Y) ~ arccot(X/Y) when Y >= 0.0 -- arccot(X/Y)-Pi when Y < 0.0 -- Usage: -- Z := ARCCOT(X); -- Z, in radians, is the angle (in the -- -- quadrant containing the point (X,1.0) -- -- whose cotangent is X -- Z := ARCCOT(X, Y); -- Z, in radians, is the angle (in the -- -- quadrant containing the point (X,Y)) -- -- whose cotangent is X/Y -- Domain: -- Y /= 0.0 when X = 0.0 -- Range: -- (a) 0.0 <= ARCCOT(X) <= Pi -- (b) 0.0 <= ARCCOT(X,Y) <= Pi when Y >= 0.0 -- (c) -Pi <= ARCCOT(X,Y) <= 0.0 when Y < 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOT(0.0) = Pi/2 -- (c) ARCCOT(0.0,Y) = Pi/2 when Y > 0.0 -- -Pi/2 when Y < 0.0 -- (d) ARCCOT(X,0.0) = 0.0 when X > 0.0 -- Pi when X < 0.0 -- Notes: -- -Pi,-Pi/2,Pi/2 and Pi are not safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (b), (c), or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin return arctan( y => y, x => x); end arccot; function arccot (x : Float; y : Float := 1.0; cycle : Float) return Float is -- -- Declaration: -- function ARCCOT (X : FLOAT_TYPE; -- Y : FLOAT_TYPE := 1.0; -- CYCLE : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- (a) ARCCOT(X,CYCLE) ~ arccot(X)*CYCLE/2Pi -- (b) ARCCOT(X,Y) ~ arccot(X/Y)*CYCLE/2Pi when Y >= 0.0 -- (arccot(X/Y)-Pi)*CYCLE/2Pi -- Usage: -- Z := ARCCOT(X, CYCLE => 360.0); -- Z, in degrees, is the -- -- angle (in the quadrant -- -- containing the point -- -- (X,1.0)) whose cotangent -- -- is X -- Z := ARCCOT(X, CYCLE => 1.0); -- Z, in bams, is the -- -- angle (in the quadrant -- -- containing the point -- -- (X,1.0)) whose cotangent -- -- is X -- Z := ARCCOT(X, CYCLE => CYCLE); -- Z, in units such that one -- -- complete cycle of rotation -- -- corresponds to Z = CYCLE, -- -- is the angle (in the -- -- quadrant containing the -- -- point (X,1.0)) whose -- -- cotangent is X -- Z := ARCCOT(X, Y, 360.0); -- Z, in degrees, is the -- -- angle (in the quadrant -- -- containing the point (X,Y)) -- -- whose cotangent is X/Y -- Z := ARCCOT(X, Y, 1.0); -- Z, in bams, is the -- -- angle (in the quadrant -- -- containing the point (X,Y) -- -- whose cotangent is X/Y -- Z := ARCCOT(X, Y, CYCLE); -- Z, in units such that one -- -- complete cycle of rotation -- -- corresponds to Z = CYCLE -- -- is the angle (in the -- -- quadrant containing the -- -- point (X,Y)) whose -- -- cotangent is X/Y -- Domain: -- (a) Y /= 0.0 when X = 0.0 -- (b) CYCLE > 0.0 -- Range: -- (a) 0.0 <= ARCCOT((X,CYCLE) <= CYCLE/2.0 -- (b) 0.0 <= ARCCOT(X,Y,CYCLE) <= CYCLE/2.0 when Y >= 0.0 -- (c) -CYCLE/2.0 <= ARCCOT(X,Y,CYCLE) <= 0.0 when Y < 0.0 -- Accuracy: -- (a) Maximum relative error = 4.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOT(0.0,CYCLE) = CYCLE/4.0 -- (c) ARCCOT(0.0,Y,CYCLE) = CYCLE/4.0 when Y > 0.0 -- -CYCLE/4.0 when Y < 0.0 -- (d) ARCCOT(X,0.0,CYCLE) = 0.0 when X > 0.0 -- CYCLE/2.0 when X < 0.0 -- Notes: -- - CYCLE/2.0, - CYCLE/4.0, CYCLE/4.0 and CYCLE/2.0 -- might not be safe numbers of FLOAT_TYPE. Accordingly, -- an implementation may exceed the range limits, but only slightly; -- cf. Section 9 for a precise statement of the requirements. Similarly, -- when accuracy requirement (b), (c), or (d) applies, an implementation may -- approximate the prescribed result, but only within narrow limits; -- cf. Section 10 for a precise statement of the requirements. -- begin return arctan( y => y, x => x, cycle => cycle); end arccot; function sinh (x : Float) return Float is -- -- Declaration: -- function SINH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- SINH(X) ~ sinh X -- Usage: -- Z := SINH(X); -- Domain: -- Mathematically unbounded -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) SINH(0.0) = 0.0 -- Notes: -- The usable domain of SINH is approximately given by -- |X| <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0) -- ax: constant Float := abs( x); meet_exp: constant := (mant + 1) * nat_log_2 / 2.0; begin if ax > meet_exp then -- essentially +-exp(ax)/2, but must round and overflow properly declare ex: constant Float := (exp_1 / 2.0) * exp( ax - 1.0); begin if x >= 0.0 then return ex; else return - ex; end if; end; elsif abs( x) <= 0.25 then -- small args need an economized taylor series, relerr<1.5e-9. declare a1: constant := 1.00000000151607e+00; a3: constant := 1.66666230112174e-01; a5: constant := 8.35195337329340e-03; x2: constant Float := x * x; begin return x * (a1 + x2 * (a3 + x2 * a5)); end; else -- 1/4 < abs x <= meet_exp declare ex: constant Float := exp( x); begin return 0.5 * (ex - 1.0 / ex); end; end if; end sinh; function cosh (x : Float) return Float is -- -- Declaration: -- function COSH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COSH(X) ~ cosh X -- Usage: -- Z := COSH(X); -- Domain: -- Mathematically unbounded -- Range: -- COSH(X) >= 1.0 -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) COSH(0.0) = 1.0 -- Notes: -- The usable domain of COSH is approximately given by -- |X| <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0) -- ax: constant Float := abs( x); meet_exp: constant := (mant + 1) * nat_log_2 / 2.0; begin if ax > meet_exp then -- essentially exp(ax)/2, but must round and overflow properly return (exp_1 / 2.0) * exp( ax - 1.0); else declare ex: constant Float := exp( x); -- moderate size begin return 0.5 * (ex + 1.0 / ex); end; end if; end cosh; function tanh (x : Float) return Float is -- -- Declaration: -- function TANH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- TANH(X) ~ tanh X -- Usage: -- Z := TANH(X); -- Domain: -- Mathematically unbounded -- Range: -- |TANH(X)| <= 1.0 -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) TANH(0.0) = 0.0 -- ax: constant Float := abs( x); meet_exp: constant := (mant + 1) * nat_log_2 / 2.0; begin -- check whether exp(-ax) negligible wrt exp(ax) if ax > meet_exp then if x >= 0.0 then return 1.0; else return -1.0; end if; elsif ax <= 0.25 then -- small args need an economized taylor series, relerr<2.5e-9. declare x2: constant Float := x * x; a0: constant := 9.99999997535969e-01; a2: constant := -3.33332067277568e-01; a4: constant := 1.33231448939255e-01; a6: constant := -5.13290121422199e-02; begin return x * (a0 + x2 * (a2 + x2 * (a4 + x2 * a6))); end; else -- 1/4 < abs x <= meet_exp declare ex: Float := exp( x); begin return (ex - 1.0 / ex) / (ex + 1.0 / ex); end; end if; end tanh; function coth (x : Float) return Float is -- -- Declaration: -- function COTH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- COTH(X) ~ coth X -- Usage: -- Z := COTH(X); -- Domain: -- X /= 0.0 -- Range: -- |COTH(X)| >= 1.0 -- Accuracy: -- Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- begin if x /= 0.0 then return 1.0 / tanh( x); else raise argument_error; end if; end coth; function arcsinh (x : Float) return Float is -- -- Declaration: -- function ARCSINH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCSINH(X) ~ arcsinh X -- Usage: -- Z := ARCSINH(X); -- Domain: -- Mathematically unbounded -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCSINH(0.0) = 0.0 -- Notes: -- The reachable range of ARCSINH is approximately given by -- |ARCSINH(X)| <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0) -- ax: Float := abs( x); as: Float; reduce_edge: constant := sqrt_2 / 4.0; -- see reduced_logdel_2 for its domain definition begin if ax >= recip_sqrt_epsilon then as := 1.0 + log_2( ax); elsif ax >= reduce_edge then as := log_2( ax + sqrt( 1.0 + sqr(ax))); elsif ax > Float'base'epsilon then -- ax < sqrt(1/8) -- z = x+sqrt(1+x**2) -- (z-1)/(z+1) = x/(1+sqrt(1+x**2)) as := reduced_logdel_2( ax / (1.0 + sqrt( 1.0 + sqr( ax)))); else return x; -- sign already correct for small args end if; if x >= 0.0 then return nat_log_2 * as; else return - nat_log_2 * as; end if; end arcsinh; function arccosh (x : Float) return Float is -- -- Declaration: -- function ARCCOSH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCCOSH(X) ~ arccosh X -- Usage: -- Z := ARCCOSH(X); -- Domain: -- X >= 1.0 -- Range: -- ARCCOSH(X) >= 0.0 -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCCOSH(1.0) = 0.0 -- Notes: -- The upper bound of the reachable range of ARCCOSH is approximately given -- by ARCCOSH(X) <= ln(FLOAT_TYPE'SAFE_LARGE)+ln(2.0) -- reduce_edge: constant := 0.75 * sqrt_2; -- see reduced_logdel_2 for its domain definition begin if x >= recip_sqrt_epsilon then return nat_log_2 * (1.0 + log_2( x)); elsif x >= reduce_edge then return nat_log_2 * log_2( x + sqrt( sqr( x) - 1.0)); elsif x >= 1.0 then -- z = x+sqrt(x**2-1) -- (z-1)/(z+1) = sqrt((x-1)/(x+1)) return nat_log_2 * reduced_logdel_2( sqrt( (x - 1.0) / (x + 1.0) )); else raise argument_error; end if; end arccosh; function arctanh (x : Float) return Float is -- -- Declaration: -- function ARCTANH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCTANH(X) ~ arctanh X -- Usage: -- Z := ARCTANH(X); -- Domain: -- |X| < 1.0 -- Range: -- Mathematically unbounded -- Accuracy: -- (a) Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- (b) ARCTANH(0.0) = 0.0 -- ax: constant Float := abs( x); half_ln_2: constant := nat_log_2 / 2.0; reduce_edge: constant := 1.0 / (3.0 + 2.0 * sqrt_2); -- see reduced_logdel_2 for its domain definition begin if ax >= 1.0 then raise argument_error; elsif ax < reduce_edge then return reduced_logdel_2( x) * half_ln_2; else return log_2( (1.0 + x) / (1.0 - x)) * half_ln_2; end if; end arctanh; function arccoth (x : Float) return Float is -- -- Declaration: -- function ARCCOTH (X : FLOAT_TYPE) return FLOAT_TYPE; -- Definition: -- ARCCOTH(X) ~ arccoth X -- Usage: -- Z := ARCCOTH(X); -- Domain: -- |X| > 1.0 -- Range: -- Mathematically unbounded -- Accuracy: -- Maximum relative error = 8.0 * FLOAT_TYPE'BASE'EPSILON -- ax: constant Float := abs( x); half_ln_2: constant := nat_log_2 / 2.0; reduce_edge: constant := 3.0 + 2.0 * sqrt_2; -- see reduced_logdel_2 for its domain definition begin if ax <= 1.0 then raise argument_error; elsif ax > reduce_edge then return reduced_logdel_2( 1.0 / x) * half_ln_2; else -- 1 < ax <= 3+2*sqrt2 return log_2( (x + 1.0) / (x - 1.0)) * half_ln_2; end if; end arccoth; end Math; -- $Header: s_elementary_functions_b.ada,v 3.13 90/04/19 12:34:31 broman Rel $